/*******************************************************************************

  This code compiles Headstart enrollment data for Boston programs.

*******************************************************************************/

* Set switches
local clean_pre2008			1
	local program_2008 				1
	local enroll_2008					1
	local merge_2008					1
local clean_post2008		1
	local program_post2008 		1
	local enroll_post2008 		1
	local merge_post2008			1
local append						1

/* Clean Headstart Data - pre-2008 */
if `clean_pre2008' == 1 {
	if `program_2008' == 1 {
	* Compile program data pre-2008
		* Save as .dta files
		forval y = 1994(1)2008 {
			display in red "imported year: `y'"
			local y_mod = mod(`y', 100)
			local y_mod_str = string(`y_mod', "%02.0f")
			display in red "imported year stem: `y_mod_str'"
			insheet using "$raw_data_headstart/tbl`y_mod_str'Programs.csv", tab names clear
			gen year = `y'
			if (`y' == 1994 | (`y' >= 2000 & `y' <= 2001)) {
				rename q1a name
				rename q1b address
				rename q1c city
				rename q1d state
				cap rename q1e zip
				rename q4 agency_type
			}
			else if (`y' == 1995) {
				foreach s of varlist a b c d e f g1 g2 {
					cap rename `s' q1`s'
				}
				rename q1a name
				rename q1b address
				rename q1c city
				rename q1d state
				rename q1e zip
				rename q4 agency_type
			}
			else if (`y' >= 1996 & `y' <= 1999) {
				rename a name
				rename b address
				rename c city
				rename d state
				rename e zip
				rename q4 agency_type
				cap replace zip = subinstr(zip, "-", "", .)
				destring(zip), replace
			}
			else if (`y' >= 2002) {
				rename q03 prog_type
				rename q04 name
				rename q05 address
				rename q06 address2
				rename q07 city
				rename q08 state
				rename q09 zip
				rename q19 agency_type
			}
			keep year grnum delnum name address city state zip agency_type
			save "$stata_data_headstart/tbl`y_mod_str'Programs.dta", replace
		}

		* Append
		clear
		set obs 1
		forval y = 1994(1)2008  {
			local y_mod = mod(`y', 100)
			local y_mod_str = string(`y_mod', "%02.0f")
			display in red "imported year stem: `y_mod_str'"
			append using "$stata_data_headstart/tbl`y_mod_str'Programs.dta"
		}
		drop if grnum == ""

		* Subset to Massachusetts programs
		replace city = upper(city)
		keep if state == "MA"
		tab city

		* Use BPS listing of neighborhoods to identify relevant neighborhoods: https://www.bostonpublicschools.org/Page/7433
		gen bps = inlist(city, "CHARLESTOWN", "DORCHESTER", "BOSTON", "ROXBURY", "BOSTON (ROXBURY)")
		tab bps

		isid grnum delnum year

		save "$stata_data_headstart/headstart_programs_MA", replace
	}

	/* Clean Enrollment Data - Pre-2008 */

	if `enroll_2008' == 1 {
		clear
		* Compile program data pre-2008
		* Save as .dta files
		forval y = 1994(1)2008 {
			display in red "imported year: `y'"
			local y_mod = mod(`y', 100)
			local y_mod_str = string(`y_mod', "%02.0f")
			display in red "imported year stem: `y_mod_str'"
			if `y' < 1995 {
				display in red "year: `y'"
				insheet using "$raw_data_headstart/tbl`y_mod_str'Enrollment.csv", tab names clear
				rename q9a total_funded
				rename q9b acyf_funded
				rename q9c1 act_less1year
				rename q9c2 act_1year
				rename q9c3 act_2year
				rename q9c4 act_3year
				rename q9c5 act_4year
				rename q9c6 act_5year
			}
			if (`y' >= 1995 & `y' <= 2001) {
				display in red "year: `y'"
				insheet using "$raw_data_headstart/tbl`y_mod_str'Enrollment.csv", comma names clear
				cap rename * q9*
				cap rename q9grnum grnum
				cap rename q9delnum delnum
				cap rename q9GRNUM grnum
				cap rename q9A q9a
				rename q9a total_funded
				rename q9b acyf_funded
				rename q9c1 act_less1year
				rename q9c2 act_1year
				rename q9c3 act_2year
				rename q9c4 act_3year
				rename q9c5 act_4year
				rename q9c6 act_5year
			}
			if (`y' >= 2002) {
				insheet using "$raw_data_headstart/tbl`y_mod_str'Enrollment.csv", tab names clear
				cap rename GRNUM grnum
				cap rename DELNUM delnum
				rename a02 acyf_funded
				rename a04 total_funded
				rename a16a act_less1year
				rename a16b act_1year
				rename a16c act_2year
				rename a16d act_3year
				rename a16e act_4year
				rename a16f act_5year
			}
			gen year = `y'
			keep grnum delnum year total_funded acyf_funded act_*
			save "$stata_data_headstart/tbl`y_mod_str'Enrollment.dta", replace
		}

		* Append
		clear
		set obs 1
		forval y = 1994(1)2008  {
			local y_mod = mod(`y', 100)
			local y_mod_str = string(`y_mod', "%02.0f")
			display in red "imported year stem: `y_mod_str'"
			append using "$stata_data_headstart/tbl`y_mod_str'Enrollment.dta"
		}


		save "$stata_data_headstart/headstart_enrollment_MA", replace
	}

	/* Merge Pre-2008 data */
	use "$stata_data_headstart/headstart_programs_MA.dta", clear
	merge 1:m grnum delnum year using "$stata_data_headstart/headstart_enrollment_MA", keep(1 3) nogen
	save "$stata_data_headstart/headstart_programs_enrollment_MA.dta", replace

	* Note: year refers to spring. So Academic year should be year-1 to year AY
	order year
	keep if bps == 1

	collapse (sum) act_4year, by(year)

	save "$stata_data_headstart/pre_2008_headstart_tabs.dta", replace
}

/* Clean Headstart Data - post-2008 */

if `program_post2008' == 1 {
	* Save program information (such as addresses)
	forval y = 2009/2019 {
		if `y' <= 2014 {
			import excel using "$raw_data_headstart/pir_export_`y'.xls", sheet("Program Details") firstrow clear
		}
		else {
			import excel using "$raw_data_headstart/pir_export_`y'.xlsx", sheet("Program Details") firstrow clear
		}
		gen year = `y'
		save "$stata_data_headstart/program_`y'", replace
	}

	* Append program information
	clear
	set obs 1
	forval y = 2009/2019 {
		append using "$stata_data_headstart\program_`y'"
	}
	drop if GrantNumber == ""

	* Subset to Massachusetts programs
	replace ProgramCity = upper(ProgramCity)
	keep if ProgramState == "MA"
	tab ProgramCity

	* Use BPS listing of neighborhoods to identify relevant neighborhoods: https://www.bostonpublicschools.org/Page/7433
	gen bps = inlist(ProgramCity, "CHARLESTOWN", "DORCHESTER", "BOSTON", "ROXBURY", "BOSTON (ROXBURY)")
	tab bps

	save "$stata_data_headstart/headstart_programs_MA_post2008", replace
}

if `enroll_post2008' == 1 {
	* Save enrollment data in each year
	forval y = 2009/2019 {
		if `y' <= 2014 {
			import excel using "$raw_data_headstart/pir_export_`y'.xls", sheet("Section A") cellrange(A2) firstrow clear
			if `y' <= 2010 {
				rename A10e act_4year
			}
			else {
				rename A12e act_4year
			}
		}
		else {
			import excel using "$raw_data_headstart/pir_export_`y'.xlsx", sheet("Section A") cellrange(A2) firstrow clear
			rename A13e act_4year
		}
		keep GrantNumber ProgramNumber Type Program act_4year
		gen year = `y'
		save "$stata_data_headstart/enrollment_`y'", replace
	}

	* Append enrollment data
	clear
	set obs 1
	forval y = 2009/2019 {
		append using "$stata_data_headstart/enrollment_`y'"
	}
	drop if GrantNumber == ""
	isid GrantNumber ProgramNumber year
	save "$stata_data_headstart/headstart_enrollment_MA_post2008", replace
}

if `merge_post2008' == 1 {
	use "$stata_data_headstart/headstart_programs_MA_post2008", clear
	merge 1:1 GrantNumber ProgramNumber year using "$stata_data_headstart/headstart_enrollment_MA_post2008", keep(1 3) nogen
	save "$stata_data_headstart/headstart_programs_enrollment_MA_post2008.dta", replace

	* Note: year refers to spring. So Academic year should be year-1 to year AY
	order year
	keep if bps == 1

	collapse (sum) act_4year, by(year)

	save "$stata_data_headstart/post_2008_headstart_tabs.dta", replace
}

if `append' == 1 {
	use "$stata_data_headstart/post_2008_headstart_tabs.dta", clear
	append using "$stata_data_headstart/pre_2008_headstart_tabs.dta"

	* This saved dataset can be pasted into the raw tab called "HS Enrollment" in the final Excel deck
	sort year
	save "$stata_data_headstart/all_years_headstart_tabs.dta", replace
}
